Setup¶

In [1]:
# Base libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
# NetworkX
import networkx as nx
import osmnx as ox
# OS environment setup
from local_directories import *
In [2]:
# Reset random seeds
random_seed = 2674
# Other
neighbourhood_min_nodes = 8
max_distance = 500

Load data¶

In [3]:
# Load Leciester's graph
leicester_osmnx_graph = ox.io.load_graphml(bulk_storage_directory + "/osmnx/raw_excluded/leicester-1864.graphml")
leicester_osmnx_graph_prj = ox.project_graph(leicester_osmnx_graph)
In [4]:
len(list(leicester_osmnx_graph.nodes))
Out[4]:
13293
In [5]:
ox.plot_graph(
    leicester_osmnx_graph_prj,
    node_size=5, node_color="#000000",
    edge_color="#000000", edge_linewidth=0.1,
    bgcolor="#ffffff",
    figsize=(16, 16))
Out[5]:
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
In [6]:
# Convert graph to dataframe version
leicester_osmnx_graph_prj_df = None
for node in leicester_osmnx_graph_prj:
    node_dict = leicester_osmnx_graph_prj.nodes[node]
    node_dict["osmnx_node_id"] = int(node)
    # node_dict["osmnx_node_id"] = str(node)
    if leicester_osmnx_graph_prj_df is None:
        leicester_osmnx_graph_prj_df = pd.DataFrame.from_dict([node_dict])
    else:
        leicester_osmnx_graph_prj_df = pd.concat([leicester_osmnx_graph_prj_df, pd.DataFrame.from_dict([node_dict])])
leicester_osmnx_graph_prj_df.head()
Out[6]:
y x street_count elevation elevation_aster elevation_srtm lon lat osmnx_node_id ref highway
0 5.829804e+06 622151.977595 3 72.0 35 72 -1.196195 52.604506 194739 NaN NaN
0 5.829991e+06 622098.041002 3 72.0 45 72 -1.196922 52.606196 1551014281 NaN NaN
0 5.828827e+06 622259.813792 2 79.0 57 79 -1.194965 52.595696 326312 21 motorway_junction
0 5.830107e+06 622077.742140 3 79.0 43 79 -1.197179 52.607245 326320 21 motorway_junction
0 5.829673e+06 622220.645785 3 74.0 35 74 -1.195230 52.603314 2627867454 NaN NaN
In [7]:
# Load Leciester's base stats
leicester_osmnx_basic_stats = pd.read_csv(this_repo_directory + "/data/leicester-1864_basic_stats_dist500.csv")
leicester_osmnx_basic_stats = leicester_osmnx_basic_stats.rename(columns={"node_id":"osmnx_node_id"})
# Drop NAs created when ego-graph has less than 8 nodes
leicester_osmnx_basic_stats = leicester_osmnx_basic_stats.dropna(subset=["osmnx_node_id"])
leicester_osmnx_basic_stats.head()
Out[7]:
0 n m k_avg edge_length_total edge_length_avg streets_per_node_avg streets_per_node_counts streets_per_node_proportions intersection_count street_length_total street_segment_count street_length_avg circuity_avg self_loop_proportion osmnx_node_id
7 NaN 11.0 11.0 2.0 1261.861 114.714636 3.0 {0: 0, 1: 0, 2: 0, 3: 11} {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} 11.0 1261.861 11.0 114.714636 1.038343 0.0 337976.0
8 NaN 13.0 13.0 2.0 2126.471 163.574692 3.0 {0: 0, 1: 0, 2: 0, 3: 13} {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} 13.0 2126.471 13.0 163.574692 1.030988 0.0 337979.0
9 NaN 14.0 14.0 2.0 1870.996 133.642571 3.0 {0: 0, 1: 0, 2: 0, 3: 14} {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} 14.0 1870.996 14.0 133.642571 1.048630 0.0 337983.0
10 NaN 14.0 14.0 2.0 1815.929 129.709214 3.0 {0: 0, 1: 0, 2: 0, 3: 14} {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} 14.0 1815.929 14.0 129.709214 1.050192 0.0 337985.0
11 NaN 14.0 14.0 2.0 1870.996 133.642571 3.0 {0: 0, 1: 0, 2: 0, 3: 14} {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} 14.0 1870.996 14.0 133.642571 1.048630 0.0 337986.0
In [8]:
# Load Leciester's embeddings
leicester_emb_df = pd.read_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-5.csv")
leicester_emb_df.head()
Out[8]:
osmnx_node_id EMB000 EMB001
0 337976 -0.212304 -0.563564
1 337979 -0.322662 -0.882213
2 337983 -0.009132 0.948856
3 337985 -0.136350 0.965531
4 337986 -0.203456 0.447374

Explore embeddings¶

In [9]:
fig = px.scatter(
    leicester_emb_df,
    x="EMB000",
    y="EMB001",
    hover_data=['osmnx_node_id'],
    width=800, height=800
)
fig.update_layout({"plot_bgcolor": "#ffffff"})
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
In [10]:
for node in leicester_osmnx_graph_prj.nodes:
    if len(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB000"].values) == 0:
        leicester_osmnx_graph_prj.nodes[node]["EMB000"] = None
        leicester_osmnx_graph_prj.nodes[node]["EMB001"] = None
    else:
        leicester_osmnx_graph_prj.nodes[node]["EMB000"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB000"].values)
        leicester_osmnx_graph_prj.nodes[node]["EMB001"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB001"].values)
In [11]:
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
    leicester_osmnx_graph_prj.nodes[node]["EMB000"] for node in leicester_osmnx_graph_prj.nodes],
    node_size=10, bgcolor="#ffffff",
    figsize=(16, 16))
Out[11]:
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
In [12]:
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
    leicester_osmnx_graph_prj.nodes[node]["EMB001"] for node in leicester_osmnx_graph_prj.nodes],
    node_size=10, bgcolor="#ffffff",
    figsize=(16, 16))
Out[12]:
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)

Correlations with basic stats¶

Checking correlations between EMB000 and EMB001 and all the basic statistics from OSMnx. No clear correlation found.

In [13]:
leicester_closeness_centrality = pd.DataFrame.from_dict(
    nx.closeness_centrality(leicester_osmnx_graph),
    orient='index',
    columns=['closeness_networkwide'])
leicester_closeness_centrality['osmnx_node_id'] = leicester_closeness_centrality.index
leicester_closeness_centrality.head()
Out[13]:
closeness_networkwide osmnx_node_id
194739 0.000188 194739
326312 0.000000 326312
326313 0.015414 326313
326320 0.000150 326320
326321 0.000145 326321
In [14]:
leicester_betweenness_centrality = pd.DataFrame.from_dict(
    nx.betweenness_centrality(leicester_osmnx_graph),
    orient='index',
    columns=['betweenness_networkwide'])
leicester_betweenness_centrality['osmnx_node_id'] = leicester_betweenness_centrality.index
leicester_betweenness_centrality.head()
Out[14]:
betweenness_networkwide osmnx_node_id
194739 1.132093e-07 194739
326312 0.000000e+00 326312
326313 0.000000e+00 326313
326320 2.983064e-04 326320
326321 2.830232e-08 326321
In [15]:
leicester_pairplot_df = leicester_osmnx_basic_stats[
    ["osmnx_node_id","n", "m", "k_avg", "edge_length_total", "edge_length_avg",
    "streets_per_node_avg", "intersection_count", "street_length_total",
    "street_segment_count", "street_length_avg", "circuity_avg"]].merge(
        leicester_closeness_centrality,
        on="osmnx_node_id"
    ).merge(
        leicester_betweenness_centrality,
        on="osmnx_node_id"
    ).merge(
        leicester_emb_df[["osmnx_node_id", "EMB000", "EMB001"]],
        on="osmnx_node_id"
    )
In [16]:
leicester_pairplot_df_withpooled = pd.merge(
    leicester_pairplot_df,
    pd.read_csv(this_repo_directory + "/data/leicester-1864_emb-pool_gnnuf_model_v0-5.csv").rename(columns={"EMB000":"EMB000pooled", "EMB001":"EMB001pooled"}),
    on="osmnx_node_id"
    )
In [17]:
print(leicester_pairplot_df_withpooled.drop(columns=["osmnx_node_id"]).corr(method="kendall"))
                                n         m     k_avg  edge_length_total   
n                        1.000000  0.891733  0.013712           0.589867  \
m                        0.891733  1.000000  0.132379           0.665304   
k_avg                    0.013712  0.132379  1.000000           0.305397   
edge_length_total        0.589867  0.665304  0.305397           1.000000   
edge_length_avg         -0.296360 -0.234006  0.292995           0.104432   
streets_per_node_avg     0.097850  0.117131  0.151434           0.321869   
intersection_count       0.826980  0.810128  0.057609           0.674507   
street_length_total      0.617244  0.656864  0.211502           0.879629   
street_segment_count     0.902224  0.878092  0.040801           0.635415   
street_length_avg       -0.292572 -0.224905  0.323991           0.108293   
circuity_avg            -0.118638 -0.118920 -0.000375          -0.089911   
closeness_networkwide    0.047026  0.058577  0.090216           0.243250   
betweenness_networkwide  0.268942  0.266156  0.047262           0.369444   
EMB000                  -0.033401  0.013223  0.260863           0.210405   
EMB001                  -0.103946 -0.100975  0.005101          -0.130952   
EMB000pooled            -0.138311 -0.068330  0.376507           0.208221   
EMB001pooled            -0.226253 -0.212787  0.036726          -0.245809   

                         edge_length_avg  streets_per_node_avg   
n                              -0.296360              0.097850  \
m                              -0.234006              0.117131   
k_avg                           0.292995              0.151434   
edge_length_total               0.104432              0.321869   
edge_length_avg                 1.000000              0.340156   
streets_per_node_avg            0.340156              1.000000   
intersection_count             -0.166574              0.279544   
street_length_total             0.080524              0.389084   
street_segment_count           -0.232123              0.197164   
street_length_avg               0.904994              0.316873   
circuity_avg                    0.048389             -0.153925   
closeness_networkwide           0.308200              0.448797   
betweenness_networkwide         0.140399              0.226000   
EMB000                          0.370231              0.280222   
EMB001                         -0.044676             -0.231995   
EMB000pooled                    0.579544              0.430593   
EMB001pooled                   -0.021938             -0.420593   

                         intersection_count  street_length_total   
n                                  0.826980             0.617244  \
m                                  0.810128             0.656864   
k_avg                              0.057609             0.211502   
edge_length_total                  0.674507             0.879629   
edge_length_avg                   -0.166574             0.080524   
streets_per_node_avg               0.279544             0.389084   
intersection_count                 1.000000             0.739393   
street_length_total                0.739393             1.000000   
street_segment_count               0.910502             0.687454   
street_length_avg                 -0.170992             0.084041   
circuity_avg                      -0.143807            -0.106058   
closeness_networkwide              0.157045             0.284100   
betweenness_networkwide            0.319792             0.390078   
EMB000                             0.047278             0.191975   
EMB001                            -0.143725            -0.163191   
EMB000pooled                      -0.019241             0.190095   
EMB001pooled                      -0.302492            -0.315328   

                         street_segment_count  street_length_avg   
n                                    0.902224          -0.292572  \
m                                    0.878092          -0.224905   
k_avg                                0.040801           0.323991   
edge_length_total                    0.635415           0.108293   
edge_length_avg                     -0.232123           0.904994   
streets_per_node_avg                 0.197164           0.316873   
intersection_count                   0.910502          -0.170992   
street_length_total                  0.687454           0.084041   
street_segment_count                 1.000000          -0.233779   
street_length_avg                   -0.233779           1.000000   
circuity_avg                        -0.132164           0.043635   
closeness_networkwide                0.107291           0.301521   
betweenness_networkwide              0.295352           0.126387   
EMB000                               0.008809           0.365477   
EMB001                              -0.133773          -0.044273   
EMB000pooled                        -0.069979           0.588756   
EMB001pooled                        -0.284915          -0.014573   

                         circuity_avg  closeness_networkwide   
n                           -0.118638               0.047026  \
m                           -0.118920               0.058577   
k_avg                       -0.000375               0.090216   
edge_length_total           -0.089911               0.243250   
edge_length_avg              0.048389               0.308200   
streets_per_node_avg        -0.153925               0.448797   
intersection_count          -0.143807               0.157045   
street_length_total         -0.106058               0.284100   
street_segment_count        -0.132164               0.107291   
street_length_avg            0.043635               0.301521   
circuity_avg                 1.000000              -0.116848   
closeness_networkwide       -0.116848               1.000000   
betweenness_networkwide     -0.028838               0.245383   
EMB000                      -0.028421               0.262182   
EMB001                       0.131059              -0.194019   
EMB000pooled                -0.065721               0.364707   
EMB001pooled                 0.225035              -0.337499   

                         betweenness_networkwide    EMB000    EMB001   
n                                       0.268942 -0.033401 -0.103946  \
m                                       0.266156  0.013223 -0.100975   
k_avg                                   0.047262  0.260863  0.005101   
edge_length_total                       0.369444  0.210405 -0.130952   
edge_length_avg                         0.140399  0.370231 -0.044676   
streets_per_node_avg                    0.226000  0.280222 -0.231995   
intersection_count                      0.319792  0.047278 -0.143725   
street_length_total                     0.390078  0.191975 -0.163191   
street_segment_count                    0.295352  0.008809 -0.133773   
street_length_avg                       0.126387  0.365477 -0.044273   
circuity_avg                           -0.028838 -0.028421  0.131059   
closeness_networkwide                   0.245383  0.262182 -0.194019   
betweenness_networkwide                 1.000000  0.241848 -0.025826   
EMB000                                  0.241848  1.000000 -0.050229   
EMB001                                 -0.025826 -0.050229  1.000000   
EMB000pooled                            0.117201  0.414767 -0.104923   
EMB001pooled                           -0.155374 -0.117148  0.393005   

                         EMB000pooled  EMB001pooled  
n                           -0.138311     -0.226253  
m                           -0.068330     -0.212787  
k_avg                        0.376507      0.036726  
edge_length_total            0.208221     -0.245809  
edge_length_avg              0.579544     -0.021938  
streets_per_node_avg         0.430593     -0.420593  
intersection_count          -0.019241     -0.302492  
street_length_total          0.190095     -0.315328  
street_segment_count        -0.069979     -0.284915  
street_length_avg            0.588756     -0.014573  
circuity_avg                -0.065721      0.225035  
closeness_networkwide        0.364707     -0.337499  
betweenness_networkwide      0.117201     -0.155374  
EMB000                       0.414767     -0.117148  
EMB001                      -0.104923      0.393005  
EMB000pooled                 1.000000     -0.170566  
EMB001pooled                -0.170566      1.000000  
In [18]:
# Double-checking difference with Spearman's rho
print(leicester_pairplot_df_withpooled.drop(columns=["osmnx_node_id"]).corr(method="spearman"))
                                n         m     k_avg  edge_length_total   
n                        1.000000  0.980338  0.015981           0.784142  \
m                        0.980338  1.000000  0.188244           0.851935   
k_avg                    0.015981  0.188244  1.000000           0.432809   
edge_length_total        0.784142  0.851935  0.432809           1.000000   
edge_length_avg         -0.425638 -0.341298  0.432676           0.160137   
streets_per_node_avg     0.144189  0.172351  0.206537           0.464561   
intersection_count       0.953861  0.944876  0.071353           0.857755   
street_length_total      0.806319  0.842110  0.292745           0.972294   
street_segment_count     0.983940  0.971088  0.050415           0.827154   
street_length_avg       -0.420878 -0.328617  0.474856           0.165242   
circuity_avg            -0.173852 -0.173620 -0.001432          -0.132332   
closeness_networkwide    0.069174  0.087432  0.129970           0.360226   
betweenness_networkwide  0.382596  0.380400  0.066577           0.529267   
EMB000                  -0.049975  0.021164  0.388864           0.320914   
EMB001                  -0.157148 -0.152845  0.003666          -0.202643   
EMB000pooled            -0.204113 -0.101058  0.545514           0.310028   
EMB001pooled            -0.329446 -0.310380  0.045068          -0.358725   

                         edge_length_avg  streets_per_node_avg   
n                              -0.425638              0.144189  \
m                              -0.341298              0.172351   
k_avg                           0.432676              0.206537   
edge_length_total               0.160137              0.464561   
edge_length_avg                 1.000000              0.499462   
streets_per_node_avg            0.499462              1.000000   
intersection_count             -0.240841              0.406843   
street_length_total             0.128629              0.558292   
street_segment_count           -0.338067              0.291215   
street_length_avg               0.983105              0.469362   
circuity_avg                    0.067899             -0.230816   
closeness_networkwide           0.456250              0.641996   
betweenness_networkwide         0.203815              0.326100   
EMB000                          0.534784              0.414339   
EMB001                         -0.066179             -0.348581   
EMB000pooled                    0.775982              0.610777   
EMB001pooled                   -0.038231             -0.605623   

                         intersection_count  street_length_total   
n                                  0.953861             0.806319  \
m                                  0.944876             0.842110   
k_avg                              0.071353             0.292745   
edge_length_total                  0.857755             0.972294   
edge_length_avg                   -0.240841             0.128629   
streets_per_node_avg               0.406843             0.558292   
intersection_count                 1.000000             0.904625   
street_length_total                0.904625             1.000000   
street_segment_count               0.985890             0.867512   
street_length_avg                 -0.248148             0.133033   
circuity_avg                      -0.209816            -0.156784   
closeness_networkwide              0.233107             0.418098   
betweenness_networkwide            0.454251             0.557107   
EMB000                             0.072714             0.294263   
EMB001                            -0.218416            -0.253735   
EMB000pooled                      -0.026976             0.286724   
EMB001pooled                      -0.434794            -0.456843   

                         street_segment_count  street_length_avg   
n                                    0.983940          -0.420878  \
m                                    0.971088          -0.328617   
k_avg                                0.050415           0.474856   
edge_length_total                    0.827154           0.165242   
edge_length_avg                     -0.338067           0.983105   
streets_per_node_avg                 0.291215           0.469362   
intersection_count                   0.985890          -0.248148   
street_length_total                  0.867512           0.133033   
street_segment_count                 1.000000          -0.340965   
street_length_avg                   -0.340965           1.000000   
circuity_avg                        -0.192780           0.060435   
closeness_networkwide                0.159911           0.448151   
betweenness_networkwide              0.420247           0.183958   
EMB000                               0.014611           0.528989   
EMB001                              -0.203271          -0.065379   
EMB000pooled                        -0.103784           0.785537   
EMB001pooled                        -0.410763          -0.026850   

                         circuity_avg  closeness_networkwide   
n                           -0.173852               0.069174  \
m                           -0.173620               0.087432   
k_avg                       -0.001432               0.129970   
edge_length_total           -0.132332               0.360226   
edge_length_avg              0.067899               0.456250   
streets_per_node_avg        -0.230816               0.641996   
intersection_count          -0.209816               0.233107   
street_length_total         -0.156784               0.418098   
street_segment_count        -0.192780               0.159911   
street_length_avg            0.060435               0.448151   
circuity_avg                 1.000000              -0.176367   
closeness_networkwide       -0.176367               1.000000   
betweenness_networkwide     -0.041296               0.350389   
EMB000                      -0.043169               0.388640   
EMB001                       0.195740              -0.292064   
EMB000pooled                -0.100317               0.537505   
EMB001pooled                 0.327021              -0.505571   

                         betweenness_networkwide    EMB000    EMB001   
n                                       0.382596 -0.049975 -0.157148  \
m                                       0.380400  0.021164 -0.152845   
k_avg                                   0.066577  0.388864  0.003666   
edge_length_total                       0.529267  0.320914 -0.202643   
edge_length_avg                         0.203815  0.534784 -0.066179   
streets_per_node_avg                    0.326100  0.414339 -0.348581   
intersection_count                      0.454251  0.072714 -0.218416   
street_length_total                     0.557107  0.294263 -0.253735   
street_segment_count                    0.420247  0.014611 -0.203271   
street_length_avg                       0.183958  0.528989 -0.065379   
circuity_avg                           -0.041296 -0.043169  0.195740   
closeness_networkwide                   0.350389  0.388640 -0.292064   
betweenness_networkwide                 1.000000  0.354062 -0.053996   
EMB000                                  0.354062  1.000000 -0.077134   
EMB001                                 -0.053996 -0.077134  1.000000   
EMB000pooled                            0.170386  0.598860 -0.161475   
EMB001pooled                           -0.225828 -0.184985  0.569221   

                         EMB000pooled  EMB001pooled  
n                           -0.204113     -0.329446  
m                           -0.101058     -0.310380  
k_avg                        0.545514      0.045068  
edge_length_total            0.310028     -0.358725  
edge_length_avg              0.775982     -0.038231  
streets_per_node_avg         0.610777     -0.605623  
intersection_count          -0.026976     -0.434794  
street_length_total          0.286724     -0.456843  
street_segment_count        -0.103784     -0.410763  
street_length_avg            0.785537     -0.026850  
circuity_avg                -0.100317      0.327021  
closeness_networkwide        0.537505     -0.505571  
betweenness_networkwide      0.170386     -0.225828  
EMB000                       0.598860     -0.184985  
EMB001                      -0.161475      0.569221  
EMB000pooled                 1.000000     -0.270270  
EMB001pooled                -0.270270      1.000000  
In [19]:
sns.pairplot(leicester_pairplot_df_withpooled.drop(columns=["osmnx_node_id"]), kind="hist")
Out[19]:
<seaborn.axisgrid.PairGrid at 0x1aa2e57e0>
In [20]:
leicester_pairplot_df_withpooled.to_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-5_with-node-and-basic-stats.csv", index=False)

Check specific nodes¶

Checking the ego-network of the nodes with the most extreme values in each direction (min and max) for both UMAP0 and UMAP1

In [21]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 6782625866]
Out[21]:
osmnx_node_id EMB000 EMB001
12711 6782625866 -0.999753 -0.943405
In [22]:
ego_6782625866 = nx.generators.ego_graph(
    leicester_osmnx_graph, 6782625866,
    radius=max_distance, undirected=True, distance="length")
ego_6782625866_prj = ox.project_graph(ego_6782625866)
ox.plot_graph(
    ego_6782625866_prj,
    node_size=[20 if node == 6782625866 else 5 for node in ego_6782625866_prj.nodes],
    node_color=["#e41a1c" if node == 6782625866 else "#ffffff" for node in ego_6782625866_prj.nodes],
    figsize=(5, 5)
)
Out[22]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)
In [23]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 354554417]
Out[23]:
osmnx_node_id EMB000 EMB001
4733 354554417 -0.966505 0.982919
In [24]:
ego_354554417 = nx.generators.ego_graph(
    leicester_osmnx_graph, 354554417,
    radius=max_distance, undirected=True, distance="length")
ego_354554417_prj = ox.project_graph(ego_354554417)
ox.plot_graph(
    ego_354554417_prj,
    node_size=[20 if node == 354554417 else 5 for node in ego_354554417_prj.nodes],
    node_color=["#e41a1c" if node == 354554417 else "#ffffff" for node in ego_354554417_prj.nodes],
    figsize=(5, 5)
)
Out[24]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)
In [25]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 1179199412]
Out[25]:
osmnx_node_id EMB000 EMB001
8021 1179199412 0.999113 0.99909
In [26]:
ego_1179199412 = nx.generators.ego_graph(
    leicester_osmnx_graph, 1179199412,
    radius=max_distance, undirected=True, distance="length")
ego_1179199412_prj = ox.project_graph(ego_1179199412)
ox.plot_graph(
    ego_1179199412_prj,
    node_size=[20 if node == 1179199412 else 5 for node in ego_1179199412_prj.nodes],
    node_color=["#e41a1c" if node == 1179199412 else "#ffffff" for node in ego_1179199412_prj.nodes],
    figsize=(5, 5)
)
Out[26]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)
In [27]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 2858142815]
Out[27]:
osmnx_node_id EMB000 EMB001
11121 2858142815 0.002341 0.165836
In [28]:
ego_2858142815 = nx.generators.ego_graph(
    leicester_osmnx_graph, 2858142815,
    radius=max_distance, undirected=True, distance="length")
ego_2858142815_prj = ox.project_graph(ego_2858142815)
ox.plot_graph(
    ego_2858142815_prj,
    node_size=[20 if node == 2858142815 else 5 for node in ego_2858142815_prj.nodes],
    node_color=["#e41a1c" if node == 2858142815 else "#ffffff" for node in ego_2858142815_prj.nodes],
    figsize=(5, 5)
)
Out[28]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)
In [29]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 296162322]
Out[29]:
osmnx_node_id EMB000 EMB001
3845 296162322 0.99999 -0.999946
In [30]:
ego_296162322 = nx.generators.ego_graph(
    leicester_osmnx_graph, 296162322,
    radius=max_distance, undirected=True, distance="length")
ego_296162322_prj = ox.project_graph(ego_296162322)
ox.plot_graph(
    ego_296162322_prj,
    node_size=[20 if node == 296162322 else 5 for node in ego_296162322_prj.nodes],
    node_color=["#e41a1c" if node == 296162322 else "#ffffff" for node in ego_296162322_prj.nodes],
    figsize=(5, 5)
)
Out[30]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)

Explore patterns¶

In [31]:
leicester_emb_patters_df = leicester_emb_df.copy()
In [32]:
from sklearn.cluster import DBSCAN
leicester_emb_df_clust = leicester_emb_df[["EMB000", "EMB001"]].dropna()
clust = DBSCAN(eps=0.11, min_samples=300)
leicester_emb_patters_df["clust"] = clust.fit_predict(leicester_emb_df_clust)
leicester_emb_patters_df["clust"].nunique()
Out[32]:
8
In [33]:
colorbrewer_set1 = ["#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628", "#f781bf", "#999999"]
leicester_emb_patters_df["clust_colour"] = leicester_emb_patters_df["clust"].apply(lambda x: colorbrewer_set1[x])
leicester_emb_patters_df.head()
Out[33]:
osmnx_node_id EMB000 EMB001 clust clust_colour
0 337976 -0.212304 -0.563564 -1 #999999
1 337979 -0.322662 -0.882213 -1 #999999
2 337983 -0.009132 0.948856 -1 #999999
3 337985 -0.136350 0.965531 -1 #999999
4 337986 -0.203456 0.447374 -1 #999999
In [34]:
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    c=leicester_emb_patters_df.clust_colour,
    s=5, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()
In [35]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    mode='markers',
    marker=dict(color=leicester_emb_patters_df.clust_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
In [36]:
def bivariate_colour(x, limits):
    if x[0] is None or x[1] is None:
        return None
    else:
        if x[0] <= limits[0, 0]:
            if x[1] <= limits[1, 0]:
                # return "#e8e8e8"
                return "#e8e8e8"
            elif x[1] <= limits[1, 1]:
                # return "#cbb8d7"
                return "#e4acac"
            else:
                # return "#9972af"
                return "#c85a5a"
        if x[0] <= limits[0, 1]:
            if x[1] <= limits[1, 0]:
                # return "#e4d9ac"
                return "#b0d5df"
            elif x[1] <= limits[1, 1]:
                # return "#c8ada0"
                return "#ad9ea5"
            else:
                # return "#976b82"
                return "#985356"
        else:
            if x[1] <= limits[1, 0]:
                # return "#c8b35a"
                return "#64acbe"
            elif x[1] <= limits[1, 1]:
                # return "#af8e53"
                return "#627f8c"
            else:
                # return "#804d36"
                return "#574249"

leicester_emb_quantiles = leicester_emb_df[["EMB000", "EMB001"]].quantile([1/3, 2/3]).values.transpose()
leicester_emb_patters_df["bivariate_colour"] = leicester_emb_patters_df.apply(
    lambda x: bivariate_colour([x["EMB000"], x["EMB001"]], leicester_emb_quantiles), axis=1
)
leicester_emb_patters_df.head()
Out[36]:
osmnx_node_id EMB000 EMB001 clust clust_colour bivariate_colour
0 337976 -0.212304 -0.563564 -1 #999999 #e8e8e8
1 337979 -0.322662 -0.882213 -1 #999999 #e8e8e8
2 337983 -0.009132 0.948856 -1 #999999 #985356
3 337985 -0.136350 0.965531 -1 #999999 #c85a5a
4 337986 -0.203456 0.447374 -1 #999999 #e4acac
In [37]:
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    c=leicester_emb_patters_df.bivariate_colour,
    s=10, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()
In [38]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    mode='markers',
    marker=dict(color=leicester_emb_patters_df.bivariate_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
In [39]:
leicester_osmnx_bivariate = leicester_osmnx_graph_prj.copy()

for node in leicester_osmnx_bivariate.nodes:
    node_bivariate_colour = leicester_emb_patters_df.loc[leicester_emb_patters_df["osmnx_node_id"] == node]
    if node_bivariate_colour.empty:
        leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = "#000000"
        leicester_osmnx_bivariate.nodes[node]["clust_colour"] = "#000000"
        leicester_osmnx_bivariate.nodes[node]["node_size"] = 1
    else:
        leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = node_bivariate_colour["bivariate_colour"].values[0]
        leicester_osmnx_bivariate.nodes[node]["clust_colour"] = node_bivariate_colour["clust_colour"].values[0]
        leicester_osmnx_bivariate.nodes[node]["node_size"] = 7
In [40]:
ox.plot_graph(
    leicester_osmnx_bivariate,
    node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
    node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 1 for node in leicester_osmnx_bivariate.nodes],
    bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
    figsize=(12, 12))
Out[40]:
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)
In [41]:
# ox.plot_graph(
#     leicester_osmnx_bivariate,
#     node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
#     node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*8 if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 8 for node in leicester_osmnx_bivariate.nodes],
#     bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
#     figsize=(48, 48))
In [42]:
ox.plot_graph(
    leicester_osmnx_bivariate,
    node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
    node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"] for node in leicester_osmnx_bivariate.nodes],
    bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
    figsize=(12, 12))
Out[42]:
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)
In [43]:
# ox.plot_graph(
#     leicester_osmnx_bivariate,
#     node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
#     node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*2 for node in leicester_osmnx_bivariate.nodes],
#     bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
#     figsize=(24, 24))
In [44]:
import geopandas as gpd
leicester_gdf = gpd.GeoDataFrame(
    leicester_osmnx_graph_prj_df,
    geometry=gpd.points_from_xy(
        leicester_osmnx_graph_prj_df.lon,
        leicester_osmnx_graph_prj_df.lat
    ),
    crs="EPSG:4326"
).merge(leicester_emb_patters_df, on='osmnx_node_id', how='left').merge(leicester_osmnx_basic_stats, on='osmnx_node_id', how='left')
leicester_gdf.head()
Out[44]:
y x street_count elevation elevation_aster elevation_srtm lon lat osmnx_node_id ref ... edge_length_avg streets_per_node_avg streets_per_node_counts streets_per_node_proportions intersection_count street_length_total street_segment_count street_length_avg circuity_avg self_loop_proportion
0 5.829804e+06 622151.977595 3 72.0 35 72 -1.196195 52.604506 194739 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 5.829991e+06 622098.041002 3 72.0 45 72 -1.196922 52.606196 1551014281 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 5.828827e+06 622259.813792 2 79.0 57 79 -1.194965 52.595696 326312 21 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 5.830107e+06 622077.742140 3 79.0 43 79 -1.197179 52.607245 326320 21 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 5.829673e+06 622220.645785 3 74.0 35 74 -1.195230 52.603314 2627867454 NaN ... 160.60625 3.0 {0: 0, 1: 0, 2: 0, 3: 11} {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} 11.0 1927.275 12.0 160.60625 1.021342 0.0

5 rows × 32 columns

In [45]:
leicester_gdf[leicester_gdf["clust_colour"]!=colorbrewer_set1[-1]].dropna(subset=["EMB000"]).explore(
    color="clust_colour",
    marker_kwds={"radius": 7}, style_kwds={"stroke": False},
    tiles="Stamen Toner"
)
Out[45]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [46]:
leicester_gdf[leicester_gdf["bivariate_colour"]!="#000000"].dropna(subset=["EMB000"]).explore(
    color="bivariate_colour",
    marker_kwds={"radius": 7}, style_kwds={"stroke": False},
    legend=True,
    tiles="Stamen Toner"
)
Out[46]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [46]: